# Copyright 2016-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Some changes copyright 2021-present Matthias Valvekens,
# licensed under the license of the pyHanko project (see LICENSE file).


"""An implementation of RFC4013 SASLprep."""

__all__ = ["saslprep"]

import stringprep
import unicodedata
from collections.abc import Callable

from babeldoc.pdfminer.pdfexceptions import PDFValueError

# RFC4013 section 2.3 prohibited output.
_PROHIBITED: tuple[Callable[[str], bool], ...] = (
    # A strict reading of RFC 4013 requires table c12 here, but
    # characters from it are mapped to SPACE in the Map step. Can
    # normalization reintroduce them somehow?
    stringprep.in_table_c12,
    stringprep.in_table_c21_c22,
    stringprep.in_table_c3,
    stringprep.in_table_c4,
    stringprep.in_table_c5,
    stringprep.in_table_c6,
    stringprep.in_table_c7,
    stringprep.in_table_c8,
    stringprep.in_table_c9,
)


def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
    """An implementation of RFC4013 SASLprep.
    :param data:
        The string to SASLprep.
    :param prohibit_unassigned_code_points:
        RFC 3454 and RFCs for various SASL mechanisms distinguish between
        `queries` (unassigned code points allowed) and
        `stored strings` (unassigned code points prohibited). Defaults
        to ``True`` (unassigned code points are prohibited).
    :return: The SASLprep'ed version of `data`.
    """
    if prohibit_unassigned_code_points:
        prohibited = _PROHIBITED + (stringprep.in_table_a1,)
    else:
        prohibited = _PROHIBITED

    # RFC3454 section 2, step 1 - Map
    # RFC4013 section 2.1 mappings
    # Map Non-ASCII space characters to SPACE (U+0020). Map
    # commonly mapped to nothing characters to, well, nothing.
    in_table_c12 = stringprep.in_table_c12
    in_table_b1 = stringprep.in_table_b1
    data = "".join(
        [
            "\u0020" if in_table_c12(elt) else elt
            for elt in data
            if not in_table_b1(elt)
        ],
    )

    # RFC3454 section 2, step 2 - Normalize
    # RFC4013 section 2.2 normalization
    data = unicodedata.ucd_3_2_0.normalize("NFKC", data)

    in_table_d1 = stringprep.in_table_d1
    if in_table_d1(data[0]):
        if not in_table_d1(data[-1]):
            # RFC3454, Section 6, #3. If a string contains any
            # RandALCat character, the first and last characters
            # MUST be RandALCat characters.
            raise PDFValueError("SASLprep: failed bidirectional check")
        # RFC3454, Section 6, #2. If a string contains any RandALCat
        # character, it MUST NOT contain any LCat character.
        prohibited = prohibited + (stringprep.in_table_d2,)
    else:
        # RFC3454, Section 6, #3. Following the logic of #3, if
        # the first character is not a RandALCat, no other character
        # can be either.
        prohibited = prohibited + (in_table_d1,)

    # RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
    for char in data:
        if any(in_table(char) for in_table in prohibited):
            raise PDFValueError("SASLprep: failed prohibited character check")

    return data
